package com.wesabe.servlet.normalizers;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
/**
* Normalizes URI fragments.
*
* @author coda
*/
public abstract class UriFragmentNormalizer implements Normalizer<String> {
private static final int MAX_DECODE_DEPTH = 1;
private static final String DEFAULT_CHARSET = "UTF-8";
private static final char REPLACEMENT_CHARACTER = '\uFFFD';
@Override
public String normalize(String fragment) throws ValidationException {
try {
String lastDecoded = fragment;
for (int i = MAX_DECODE_DEPTH; i >= 0; i--) {
final String decoded = URLDecoder.decode(lastDecoded, DEFAULT_CHARSET);
if (decoded.indexOf(REPLACEMENT_CHARACTER) != -1) {
throw new ValidationException(fragment, "cannot contain invalid UTF-8 codepoints");
}
if (lastDecoded.equals(decoded)) {
return URLEncoder.encode(lastDecoded, DEFAULT_CHARSET);
}
lastDecoded = decoded;
}
throw new ValidationException(fragment, "was encoded " + MAX_DECODE_DEPTH + " or more times");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
} catch (IllegalArgumentException e) {
throw new ValidationException(fragment, "had un-decodable characters");
}
}
}